#library(ncdf4)
library(ggplot2)
require(data.table)
library(dplyr)
library(plyr)

#change to full local path to the replication data
wd1 <- "C:\\Users\\YFan\\OneDrive - NORCE\\NFood_replication\\"

#input data
dif.8crop.s <- readRDS(file = paste0(wd1,"/data/RData/climate-yield-changedata-MLR-selected.Rds"))


#==========alternative Generalized additive model (GAM) with nolinear terms for temperature and relative humidity
train.data <- dif.8crop.s[Scenario=="SAI - RCP85" & Irrig==FALSE & Crop=="Sugarcane" & area.wt>20000] 
train.data <- dif.8crop.s[Scenario=="SAI - RCP85" & Irrig==FALSE & Sample==18098]
ggplot(train.data, aes(tsa.dif, yield.dif) ) + #slight nonlinear
#ggplot(train.data, aes(rh.dif, yield.dif) ) +  #slight nonlinear (rh.dif (%) has better distribution than rh.log)
  #ggplot(train.data, aes(ppt.log, yield.dif) ) + #almost flat with ppt.dif, but near linear with ppt.log and ppt.difn
  #ggplot(train.data, aes(radd.dif, yield.dif) ) + #almost linear
  #ggplot(train.data, aes(radi.dif, yield.dif) ) + #almost linear
  geom_point() +
  stat_smooth()
rm(train.data)

#there are both positive and negative temperature changes, and a gam fit shows a non-linear relationship between tsa.dif and yield.dif
library(mgcv) #try GAM model

(crop <- unique(dif.8crop.s$Crop))
(scenario <- unique(dif.8crop.s$Scenario))
gam.irrig <- dif.8crop.s[Irrig==TRUE]  %>% 
  mutate(Crop = factor(Crop, levels=crop)) %>%
  mutate(Scenario = factor(Scenario, levels=scenario)) %>%
  dlply(c("Scenario", "Crop", "Sample"), function(df)gam(yield.dif ~ s(tsa.dif,bs='cr',k=8)+radd.log+radi.log, data = df, na.action = na.omit))
gam.rainfed <- dif.8crop.s[Irrig==FALSE]  %>% 
  mutate(Crop = factor(Crop, levels=crop)) %>%
  mutate(Scenario = factor(Scenario, levels=scenario)) %>%
  dlply(c("Scenario", "Crop", "Sample"), function(df)gam(yield.dif ~ s(tsa.dif,bs='cr',k=8)+s(rh.dif,bs='cr',k=4)+radd.log+radi.log+ppt.log, data = df, na.action = na.omit))


library(broom) #get p-value and other model statistics (AIC, BIC)
stats.gam.i<- as.data.table(ldply(gam.irrig, glance))
stats.gam.r<- as.data.table(ldply(gam.rainfed, glance))#gam, 3 knots (cubic spline)
stats.gam.i[, .(AIC.m=mean(AIC)),by=.(Scenario,Crop)] #gam has smallest AIC (but T effect is negative before 2050)
stats.gam.r[, .(AIC.m=mean(AIC)),by=.(Scenario,Crop)] 


#=====steps to get the partial effects of GAM model:
#1: model_matrix <- predict(gam_y, type = "lpmatrix") #predict only returns the basis functions (part of the smooth term)
#2: betas <- gam_y$coefficients
#3: linear_pred <- model_matrix %*% betas  #multiply the basis function and coeffs to get the real predictions for each predictor variable including smoothed terms

#extract smooth functions and coefficients from GAM models (large matrix)
#format(object.size(gam.matrix.r), units="Gb")
gam.matrix.i <- as.data.table(ldply(gam.irrig, predict, type = "lpmatrix")) #basis smooth functions applied to each climate variable
gam.betas.i <- as.data.table(ldply(gam.irrig, coef)) #coefficients (linear coeffs, applied to smoothed terms)
#names(gam.matrix.i)[-(1:6)] <- c("tsa.dif.s1", "tsa.dif.s2", "tsa.dif.s3") #3 smoothed terms
names(gam.matrix.i)[-(1:6)] <- c("tsa.dif.s1", "tsa.dif.s2", "tsa.dif.s3","tsa.dif.s4", "tsa.dif.s5", "tsa.dif.s6","tsa.dif.s7") #7 smoothed terms
names(gam.betas.i)[-(1:3)] <- c("Intercept", "RD", "RI", "T.s1", "T.s2", "T.s3", "T.s4", "T.s5", "T.s6", "T.s7") #7 smoothed terms for T 
gam.matrix.r <- as.data.table(ldply(gam.rainfed, predict, type = "lpmatrix")) #basis smooth functions applied to each climate variable
names(gam.matrix.r)[-(1:7)] <- c("tsa.dif.s1", "tsa.dif.s2", "tsa.dif.s3", "tsa.dif.s4", "tsa.dif.s5", "tsa.dif.s6","tsa.dif.s7", "rh.dif.s1", "rh.dif.s2", "rh.dif.s3") #7 smoothed terms for T and 3 for RH
gam.betas.r <- as.data.table(ldply(gam.rainfed, coef)) #coefficients (linear coeffs, applied to smoothed terms)
names(gam.betas.r)[-(1:3)] <- c("Intercept", "RD", "RI", "P", "T.s1", "T.s2", "T.s3", "T.s4", "T.s5", "T.s6", "T.s7", "RH.s1", "RH.s2", "RH.s3") 

gam.predict.i <- gam.matrix.i[gam.betas.i, .(Intercept=i.Intercept, RD.log=i.RD*radd.log, RI.log=i.RI*radi.log, 
                                             T.log=tsa.dif.s1*i.T.s1 + tsa.dif.s2*i.T.s2 + tsa.dif.s3*i.T.s3 +tsa.dif.s4*i.T.s4 + tsa.dif.s5*i.T.s5 + tsa.dif.s6*i.T.s6 + tsa.dif.s7*i.T.s7
                                             ), by=.EACHI, on=.(Scenario,Crop,Sample)]
gam.predict.r <- gam.matrix.r[gam.betas.r, .(Intercept=i.Intercept, RD.log=i.RD*radd.log, RI.log=i.RI*radi.log, P.log=i.P*ppt.log,
                                          T.log=tsa.dif.s1*i.T.s1 + tsa.dif.s2*i.T.s2 + tsa.dif.s3*i.T.s3 +tsa.dif.s4*i.T.s4 + tsa.dif.s5*i.T.s5 + tsa.dif.s6*i.T.s6 + tsa.dif.s7*i.T.s7,
                                          RH.log=rh.dif.s1*i.RH.s1 + rh.dif.s2*i.RH.s2 + rh.dif.s3*i.RH.s3), by=.EACHI, on=.(Scenario,Crop,Sample)]
gam.predict.i <- gam.predict.i[, Tot.log:=Intercept+T.log+RD.log+RI.log] #same as fitted.values
gam.predict.r <- gam.predict.r[, Tot.log:=Intercept+T.log+RD.log+RI.log+P.log+RH.log] #same as fitted.values
#NOTE: the sample ID in gam.predict.r is reordered per Scenario and per Crop 
#needs to reorder the sample ID from dif.8crop.s so that the years are aligned (otherwise the sample-year combination is wrong)
gam.year.i <- dif.8crop.s[Irrig==TRUE, .(Year,Crop,Sample,Scenario)]
(crop <- unique(dif.8crop.s$Crop))
(scenario <- unique(dif.8crop.s$Scenario))
gam.year.i <- gam.year.i %>% 
  mutate(Crop = factor(Crop, levels=crop)) %>%
  mutate(Scenario = factor(Scenario, levels=scenario)) %>%
  setorder(Scenario,Crop,Sample) 
gam.year.r <- dif.8crop.s[Irrig==FALSE, .(Year,Crop,Sample,Scenario)]
gam.year.r <- gam.year.r %>% 
  mutate(Crop = factor(Crop, levels=crop)) %>%
  mutate(Scenario = factor(Scenario, levels=scenario)) %>%
  setorder(Scenario,Crop,Sample) 
tail(gam.year.i);tail(gam.predict.i) #now the Sample and Year sequence from gam.year can be copied to gam.predict.r
tail(gam.year.r);tail(gam.predict.r)
gam.predict.i$Year <- gam.year.i$Year
gam.predict.r$Year <- gam.year.r$Year

# save(list = c("gam.matrix.i", "gam.betas.i", "gam.predict.i"), 
#      file = paste0(wd1,"/data/RData/Fig.S9.GAM-pergrid-coeff-matrix-predict-irrig.Rdata"))
# save(list = c("gam.matrix.r", "gam.betas.r", "gam.predict.r"), 
#      file = paste0(wd1,"/data/RData/Fig.S9.GAM-pergrid-coeff-matrix-predict-rainfed.Rdata"))

save(list = c("gam.predict.i"), 
     file = paste0(wd1,"/data/RData/Fig.S9.GAM-pergrid-predict-irrig.Rdata"))
save(list = c("gam.predict.r"), 
     file = paste0(wd1,"/data/RData/Fig.S9.GAM-pergrid-predict-rainfed.Rdata"))


rm(gam.irrig, gam.rainfed) #6.1GB
rm(gam.matrix.i, gam.betas.i)
rm(gam.matrix.r, gam.betas.r)


# load(paste0(wd1,"/data/RData/Fig.S9.GAM-pergrid-predict-irrig.Rdata"))
# load(paste0(wd1,"/data/RData/Fig.S9.GAM-pergrid-predict-rainfed.Rdata"))


#==========Prepare Rdata for Generalized additive model (GAM) with nolinear terms for temperature and relative humidity
#dif.8crop.s <- readRDS(file = paste0(wd1,"/data/RData/climate-yield-changedata-MLR-selected.Rds"))
(crop <- unique(dif.8crop.s$Crop))
(scenario <- unique(dif.8crop.s$Scenario))

#=========Supplement Figure S9 with nonlinear terms for T and RH predicted by GAM model
str(gam.predict.r)
nrow(gam.predict.r); nrow(dif.8crop.s[Irrig==FALSE])
gam.predict.i$Irrig <- TRUE
gam.predict.r$Irrig <- FALSE
gam.predict <- rbind(gam.predict.i, gam.predict.r, fill=TRUE)
rm(gam.predict.i, gam.predict.r)

gam.predict <- gam.predict[dif.8crop.s, area:=i.area.wt, by=.EACHI, on=.(Scenario,Crop,Irrig,Sample,Year)]
gam.predict <- gam.predict[dif.8crop.s, yield0:=i.yield0, by=.EACHI, on=.(Scenario,Crop,Irrig,Sample,Year)]
gam.predict <- gam.predict[dif.8crop.s[Scenario=="RCP45 - RCP85"], 
                           Tot.log2:=Tot.log+i.co2.log, by=.EACHI, on=.(Scenario,Crop,Irrig,Sample,Year)]
gam.predict <- gam.predict[dif.8crop.s[Scenario=="RCP45 - RCP85"], 
                           co2.log:=i.co2.log, by=.EACHI, on=.(Scenario,Crop,Irrig,Sample,Year)]
gam.effect.wm <- gam.predict[, .(T.log=weighted.mean(T.log, area, na.rm = T), RD.log=weighted.mean(RD.log, area, na.rm = T), RI.log=weighted.mean(RI.log, area, na.rm = T),
                                 RH.log=weighted.mean(RH.log, area, na.rm = T), P.log=weighted.mean(P.log, area, na.rm = T), Tot.log=weighted.mean(Tot.log, area, na.rm = T),
                                 co2.log=weighted.mean(co2.log, area, na.rm = T), Tot.log2=weighted.mean(Tot.log2, area, na.rm = T),
                                 yield0=weighted.mean(yield0, area, na.rm = T), area=sum(area, na.rm=T)),
                             by =.(Scenario,Crop,Irrig,Year)]
gam.effect.wm$Level <- "Mean"


save(list = c("gam.effect.wm"), file = paste0(wd1,"/data/RData/Fig.S9.GAM-effect-wtm.Rdata"))



